package snippets.cse524.activeDates;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Map;
import java.util.Scanner;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;

import snippets.utils.Utils;

/**
 * 
 * @author sandesh247
 *
 * Used to parse the output of GenderEvaluatorMR.
 * 
 * Switches the page title from the output to the canonical entities from Vincent's list.
 */
public class FilterBirthdates {

	/**
	 * @param args
	 * @throws FileNotFoundException 
	 */
	public static void main(String[] args) throws FileNotFoundException {
		System.err.println("Reading canonical names ...");
		Map<String, String> canonicalNames = Utils.readCanonicalNames(new FileInputStream(
				new File(args[0])));

		System.err.println("Reading news mappings ...");
		Scanner in = new Scanner(new File(args[1]));

		while (in.hasNextLine()) {
			String line = in.nextLine();
			
			String [] parts = line.split("\t");
			String entity = parts[0].trim().toLowerCase();
			
			String [] remaining = ArrayUtils.subarray(parts, 1, parts.length);
			
			if (canonicalNames.containsKey(entity)) {
				String [] out_line = new String [] { canonicalNames.get(entity) };
				out_line = ArrayUtils.addAll(out_line, remaining);
				
				System.out.println(StringUtils.join(out_line, "\t"));
			}
		}
	}

}
